Read prepared data.

subscriptions <- read_rds('../data/subscriptions.rds')
summary(subscriptions)
   customerid       subscriptionid      revenuecurr        billingcurrency    startmonth        
 Min.   :   10006   Min.   :     154   Min.   :      0.8   DKK    :668540   Min.   :2003-12-01  
 1st Qu.: 1090111   1st Qu.: 5121661   1st Qu.:     15.0   EUR    :532574   1st Qu.:2012-12-01  
 Median : 5319576   Median :14784414   Median :     63.2   USD    :505039   Median :2014-12-01  
 Mean   : 8044651   Mean   :14612438   Mean   :   1382.8   NOK    :232550   Mean   :2014-05-02  
 3rd Qu.:14410806   3rd Qu.:24044332   3rd Qu.:    174.0   SEK    :159059   3rd Qu.:2016-07-01  
 Max.   :23917123   Max.   :29424356   Max.   :2823000.0   GBP    : 96217   Max.   :2018-03-01  
                                                           (Other): 89185                       
    endmonth              months          status        num_previous_months num_previous_subs
 Min.   :2004-03-01   Min.   : 1.000   active:1840623   Min.   :  0.00      Min.   : 0.000   
 1st Qu.:2013-04-01   1st Qu.: 1.000   churn : 442541   1st Qu.:  1.00      1st Qu.: 1.000   
 Median :2015-03-01   Median : 3.000                    Median :  8.00      Median : 3.000   
 Mean   :2014-08-18   Mean   : 3.532                    Mean   : 15.34      Mean   : 6.028   
 3rd Qu.:2016-11-01   3rd Qu.: 3.000                    3rd Qu.: 22.00      3rd Qu.: 8.000   
 Max.   :2020-03-01   Max.   :24.000                    Max.   :162.00      Max.   :68.000   
                                                                                             
 num_previous_months_binned firstpaiddate        channelcat      paymentperiodchosenatstart    currency     
 Min.   : 0.00              Min.   :2003-12-15   paid :1518216   Min.   :-1.000             DKK    :668679  
 1st Qu.: 1.00              1st Qu.:2010-12-12   viral: 764948   1st Qu.: 1.000             USD    :543314  
 Median : 8.00              Median :2013-06-23                   Median : 3.000             EUR    :537318  
 Mean   :14.72              Mean   :2013-01-16                   Mean   : 4.237             NOK    :232433  
 3rd Qu.:26.00              3rd Qu.:2015-08-19                   3rd Qu.: 3.000             SEK    :158858  
 Max.   :39.00              Max.   :2018-03-26                   Max.   :24.000             GBP    : 93847  
                                                                                            (Other): 48715  
   marketname       siteverkey     firstpaidmonth        firstdevice          segment        isquickpurchase 
 DK     :669017   US     :898770   Min.   :2003-12-01   desktop: 175519   business: 407581   Min.   :0.0000  
 NO     :232881   DK     :637715   1st Qu.:2010-12-01   mobile :  70157   other   :  97467   1st Qu.:0.0000  
 US     :188723   NO     :207482   Median :2013-06-01   NA's   :2037488   personal: 293518   Median :1.0000  
 SE     :159905   SE     :144173   Mean   :2013-01-02                     NA's    :1484598   Mean   :0.5282  
 FR     :146174   FR     : 96298   3rd Qu.:2015-08-01                                        3rd Qu.:1.0000  
 (Other):886057   NL     : 79701   Max.   :2018-03-01                                        Max.   :1.0000  
 NA's   :   407   (Other):219025                                                             NA's   :306     
 productversion      isfreemium                                model31224        market_category  
 v_3    : 898821   Min.   :0.0000   pre-changes                     :1894566   DK        :669017  
 v_4    : 138959   1st Qu.:0.0000   3-12m-v1-2015-11-30             : 145756   LowGeneric:265151  
 v_older:1245078   Median :0.0000   3-12-24m-2016-08-25             : 144551   NO        :232881  
 NA's   :    306   Mean   :0.2547   12m-v2-2015-05-16               :  71866   US        :188723  
                   3rd Qu.:1.0000   ex-subscriptionplanpaywallexpand:   8305   SE        :159905  
                   Max.   :1.0000   ex-uglyjerry12mswitch           :   5388   FR        :146174  
                   NA's   :306      (Other)                         :  12732   (Other)   :621313  
 siteverkey_cat siteverkey_cat2 chosen_subs_length  gdppercapita      gdppercapita_scaled
 ORG:1384394    MUT:1311010     1  :  56935        Min.   :   218.3   Min.   :-2.4437    
 SS : 898770    ORG: 402042     -1 :  11140        1st Qu.: 42013.3   1st Qu.:-0.4694    
                SS : 570112     12 :  51360        Median : 55670.9   Median : 0.1758    
                                24 :   6897        Mean   : 51950.1   Mean   : 0.0000    
                                3  : 204319        3rd Qu.: 60637.3   3rd Qu.: 0.4104    
                                gen:1952513        Max.   :108422.5   Max.   : 2.6676    
                                                                                         
                           subscription_summary            subscription_summary_no_market
 mc-DK_ssc-MUT_ac-39_m-3_ccsl-gen    : 109608   ssc-MUT_ac-26_m-3_ccsl-gen: 203834       
 mc-DK_ssc-MUT_ac-26_m-3_ccsl-gen    :  77645   ssc-MUT_ac-39_m-3_ccsl-gen: 170628       
 mc-LowGeneric_ssc-SS_ac-0_m-1_ccsl-3:  60652   ssc-SS_ac-0_m-1_ccsl-3    : 161585       
 mc-DK_ssc-ORG_ac-0_m-3_ccsl-gen     :  58390   ssc-ORG_ac-0_m-3_ccsl-gen : 132393       
 mc-DK_ssc-MUT_ac-38_m-3_ccsl-gen    :  51997   ssc-MUT_ac-8_m-3_ccsl-gen : 120482       
 mc-DK_ssc-ORG_ac-3_m-3_ccsl-gen     :  41933   ssc-MUT_ac-38_m-3_ccsl-gen: 111133       
 (Other)                             :1882939   (Other)                   :1383109       
subscriptions_with_target <- subscriptions %>%
  # restrict to a recent expiry window
  filter(endmonth >= begin_train_window & endmonth < end_window) %>%
  mutate(num_previous_months_binned_fct = as.factor(num_previous_months_binned)) %>%
  mutate(set_type = as.factor(if_else(endmonth >= begin_validation_window, 'validation', 'training'))) %>%
  
  mutate(churnind = ifelse(status == 'churn', 1, 0)) 

Prepare churntable that we want to predict.

churntable <- subscriptions_with_target %>%
  
  group_by(set_type, siteverkey_cat2, market_category, months, num_previous_months_binned, chosen_subs_length, subscription_summary_no_market) %>%
  summarise(num_obs = n(), 
            churned = sum(churnind)) %>%
  
  group_by(set_type) %>%
  mutate(churn_rate = churned / num_obs,
         renew_rate = 1 - churn_rate,
         month_churn = 1 - renew_rate ^ (1/as.double(months)),
         log_month_churn = log(month_churn),
         weight = num_obs / sum(num_obs))
# NB! Does this introduce a bad bias ????
churntable_no_zeros <- churntable %>%
  filter(churn_rate > 0)

Train model

new_model=glm(log_month_churn ~ market_category + subscription_summary_no_market, data=churntable_no_zeros[churntable_no_zeros$set_type == 'training', ], weights = weight)

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

prediction_table <- validation(subscriptions_with_target, new_model, predict_2fct_model)

validation_plots(prediction_table)
NAs introduced by coercion

Try simple logistic model

model_logit <- glm(churnind ~ market_category + siteverkey_cat2 + num_previous_months_binned + months + chosen_subs_length,
                   data = subscriptions_with_target[subscriptions_with_target$set_type == 'training',], family = 'binomial')

Model validation for training (2017-01-01 - 2017-08-01) and validation (2017-09-01 - 2018-01-01) sets:

prediction_table_logit <- validation(subscriptions_with_target, model_logit)

validation_plots(prediction_table_logit)
NAs introduced by coercion

LS0tCnRpdGxlOiAiQ2h1cm46IFR3byBkaW1lbnNpb25zIGFuZCBwcmVkaWN0IGxvZyBvZiBtb250aGx5IGNodXJuIHByb2JhYmlsaXR5IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgpgYGB7ciBzZXR1cCwgaW5jbHVkZT1GQUxTRX0Kc291cmNlKCdjb25maWcuUicpCnNvdXJjZSgndXRpbHMuUicpCnNvdXJjZSgndXRpbHNfdmFsaWRhdGlvbi5SJykKCiMgc291cmNlKCduZXdfbGlmZXRpbWVwcmVkaWN0b3IuUicpCmBgYAoKUmVhZCBwcmVwYXJlZCBkYXRhLgoKYGBge3IgcmVhZGluZywgdGlkeT1GfQpzdWJzY3JpcHRpb25zIDwtIHJlYWRfcmRzKCcuLi9kYXRhL3N1YnNjcmlwdGlvbnMucmRzJykKYGBgCgpgYGB7cn0Kc3VtbWFyeShzdWJzY3JpcHRpb25zKQpgYGAKCmBgYHtyfQpzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0IDwtIHN1YnNjcmlwdGlvbnMgJT4lCiAgIyByZXN0cmljdCB0byBhIHJlY2VudCBleHBpcnkgd2luZG93CiAgZmlsdGVyKGVuZG1vbnRoID49IGJlZ2luX3RyYWluX3dpbmRvdyAmIGVuZG1vbnRoIDwgZW5kX3dpbmRvdykgJT4lCiAgbXV0YXRlKG51bV9wcmV2aW91c19tb250aHNfYmlubmVkX2ZjdCA9IGFzLmZhY3RvcihudW1fcHJldmlvdXNfbW9udGhzX2Jpbm5lZCkpICU+JQogIG11dGF0ZShzZXRfdHlwZSA9IGFzLmZhY3RvcihpZl9lbHNlKGVuZG1vbnRoID49IGJlZ2luX3ZhbGlkYXRpb25fd2luZG93LCAndmFsaWRhdGlvbicsICd0cmFpbmluZycpKSkgJT4lCiAgCiAgbXV0YXRlKGNodXJuaW5kID0gaWZlbHNlKHN0YXR1cyA9PSAnY2h1cm4nLCAxLCAwKSkgCmBgYAoKClByZXBhcmUgY2h1cm50YWJsZSB0aGF0IHdlIHdhbnQgdG8gcHJlZGljdC4KCmBgYHtyfQpjaHVybnRhYmxlIDwtIHN1YnNjcmlwdGlvbnNfd2l0aF90YXJnZXQgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUsIHNpdGV2ZXJrZXlfY2F0MiwgbWFya2V0X2NhdGVnb3J5LCBtb250aHMsIG51bV9wcmV2aW91c19tb250aHNfYmlubmVkLCBjaG9zZW5fc3Vic19sZW5ndGgsIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCkgJT4lCiAgc3VtbWFyaXNlKG51bV9vYnMgPSBuKCksIAogICAgICAgICAgICBjaHVybmVkID0gc3VtKGNodXJuaW5kKSkgJT4lCiAgCiAgZ3JvdXBfYnkoc2V0X3R5cGUpICU+JQogIG11dGF0ZShjaHVybl9yYXRlID0gY2h1cm5lZCAvIG51bV9vYnMsCiAgICAgICAgIHJlbmV3X3JhdGUgPSAxIC0gY2h1cm5fcmF0ZSwKICAgICAgICAgbW9udGhfY2h1cm4gPSAxIC0gcmVuZXdfcmF0ZSBeICgxL2FzLmRvdWJsZShtb250aHMpKSwKICAgICAgICAgbG9nX21vbnRoX2NodXJuID0gbG9nKG1vbnRoX2NodXJuKSwKICAgICAgICAgd2VpZ2h0ID0gbnVtX29icyAvIHN1bShudW1fb2JzKSkKCiMgTkIhIERvZXMgdGhpcyBpbnRyb2R1Y2UgYSBiYWQgYmlhcyA/Pz8/CmNodXJudGFibGVfbm9femVyb3MgPC0gY2h1cm50YWJsZSAlPiUKICBmaWx0ZXIoY2h1cm5fcmF0ZSA+IDApCmBgYAoKVHJhaW4gbW9kZWwKCmBgYHtyfQpuZXdfbW9kZWw9Z2xtKGxvZ19tb250aF9jaHVybiB+IG1hcmtldF9jYXRlZ29yeSArIHN1YnNjcmlwdGlvbl9zdW1tYXJ5X25vX21hcmtldCwgZGF0YT1jaHVybnRhYmxlX25vX3plcm9zW2NodXJudGFibGVfbm9femVyb3Mkc2V0X3R5cGUgPT0gJ3RyYWluaW5nJywgXSwgd2VpZ2h0cyA9IHdlaWdodCkKYGBgCgpNb2RlbCB2YWxpZGF0aW9uIGZvciB0cmFpbmluZyAoMjAxNy0wMS0wMSAtIDIwMTctMDgtMDEpIGFuZCB2YWxpZGF0aW9uICgyMDE3LTA5LTAxIC0gMjAxOC0wMS0wMSkgc2V0czoKCiogU3VtbWFyeSB0YWJsZSBjb250YWluaW5nCiAgICArIE51bWJlciBvZiBvYnNlcnZhdGlvbnMgd2l0aG91dCBwcmVkaWN0aW9uCiAgICArIEFVQywgbG9nbG9zcyAtIHByZWRpY3Rpb24gcXVhbGl0eSBtZXRyaWNzCiogUk9DIGN1cnZlCiogUGxvdHMgcGVyIG1hcmtldAogICAgKyBBZ2Ugb2YgY3VzdG9tZXJzIHZzIHJlYWwgYW5kIHByZWRpY3RlZCBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgZGlmZmVyZW50IHN1YnNjcmlwdGlvbiBsZW5ndGhzLiBJdCBzaG93cyBpZiB3ZSBhcmUgY29ycmVjdGx5IHByZWRpY3RpbmcgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGN1c3RvbWVycyBvdmVyIGxpZmV0aW1lLgogICAgKyBDYWxpYnJhdGlvbiAtIFByZWRpY3RlZCBwcm9iYWJpbGl0eSBvZiBjaHVybiB2cyByZWFsIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBkaWZmZXJlbnQgc3Vic2NyaXB0aW9uIGxlbmd0aHMgKHdlbGwgY2FsaWJyYXRlZCBwcmVkaWN0aW9uIHNob3VsZCBmb3JtIGEgZGlhZ29uYWwgbGluZSkuIFNob3dzIGlmIG91dGNvbWUgb2YgbW9kZWwgaW4gcXVlc3Rpb24gY2FuIGJlIHJlYWx5IHRyZWF0ZWQgYXMgcHJvYmFiaWxpdHkuCgpgYGB7ciB3YXJuaW5nPUZ9CnByZWRpY3Rpb25fdGFibGUgPC0gdmFsaWRhdGlvbihzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0LCBuZXdfbW9kZWwsIHByZWRpY3RfMmZjdF9tb2RlbCkKYGBgCgpgYGB7ciBmaWcuaGVpZ2h0PTEwLCBmaWcud2lkdGg9Nywgd2FybmluZz1GfQp2YWxpZGF0aW9uX3Bsb3RzKHByZWRpY3Rpb25fdGFibGUpCmBgYAoKVHJ5IHNpbXBsZSBsb2dpc3RpYyBtb2RlbApgYGB7cn0KbW9kZWxfbG9naXQgPC0gZ2xtKGNodXJuaW5kIH4gbWFya2V0X2NhdGVnb3J5ICsgc2l0ZXZlcmtleV9jYXQyICsgbnVtX3ByZXZpb3VzX21vbnRoc19iaW5uZWQgKyBtb250aHMgKyBjaG9zZW5fc3Vic19sZW5ndGgsCiAgICAgICAgICAgICAgICAgICBkYXRhID0gc3Vic2NyaXB0aW9uc193aXRoX3RhcmdldFtzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0JHNldF90eXBlID09ICd0cmFpbmluZycsXSwgZmFtaWx5ID0gJ2Jpbm9taWFsJykKYGBgCgoKTW9kZWwgdmFsaWRhdGlvbiBmb3IgdHJhaW5pbmcgKDIwMTctMDEtMDEgLSAyMDE3LTA4LTAxKSBhbmQgdmFsaWRhdGlvbiAoMjAxNy0wOS0wMSAtIDIwMTgtMDEtMDEpIHNldHM6CgoqIFN1bW1hcnkgdGFibGUgY29udGFpbmluZwogICAgKyBOdW1iZXIgb2Ygb2JzZXJ2YXRpb25zIHdpdGhvdXQgcHJlZGljdGlvbgogICAgKyBBVUMsIGxvZ2xvc3MgLSBwcmVkaWN0aW9uIHF1YWxpdHkgbWV0cmljcwoqIFJPQyBjdXJ2ZQoqIFBsb3RzIHBlciBtYXJrZXQKICAgICsgQWdlIG9mIGN1c3RvbWVycyB2cyByZWFsIGFuZCBwcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gZm9yIGRpZmZlcmVudCBzdWJzY3JpcHRpb24gbGVuZ3Rocy4gSXQgc2hvd3MgaWYgd2UgYXJlIGNvcnJlY3RseSBwcmVkaWN0aW5nIHByb2JhYmlsaXR5IG9mIGNodXJuIGZvciBjdXN0b21lcnMgb3ZlciBsaWZldGltZS4KICAgICsgQ2FsaWJyYXRpb24gLSBQcmVkaWN0ZWQgcHJvYmFiaWxpdHkgb2YgY2h1cm4gdnMgcmVhbCBwcm9iYWJpbGl0eSBvZiBjaHVybiBmb3IgZGlmZmVyZW50IHN1YnNjcmlwdGlvbiBsZW5ndGhzICh3ZWxsIGNhbGlicmF0ZWQgcHJlZGljdGlvbiBzaG91bGQgZm9ybSBhIGRpYWdvbmFsIGxpbmUpLiBTaG93cyBpZiBvdXRjb21lIG9mIG1vZGVsIGluIHF1ZXN0aW9uIGNhbiBiZSByZWFseSB0cmVhdGVkIGFzIHByb2JhYmlsaXR5LgpgYGB7ciB3YXJuaW5nPUZ9CnByZWRpY3Rpb25fdGFibGVfbG9naXQgPC0gdmFsaWRhdGlvbihzdWJzY3JpcHRpb25zX3dpdGhfdGFyZ2V0LCBtb2RlbF9sb2dpdCkKYGBgCgpgYGB7ciBmaWcuaGVpZ2h0PTEwLCBmaWcud2lkdGg9Nywgd2FybmluZz1GfQp2YWxpZGF0aW9uX3Bsb3RzKHByZWRpY3Rpb25fdGFibGVfbG9naXQpCmBgYA==